"""
    pyspark:调用spark Streaming 分析
        1、在进行流式计算的时候需要定义一个表结构，

"""

from pyspark import SparkContext
from pyspark.streaming import StreamingContext
from pyspark import SparkContext, SparkConf,SQLContext
from operator import add
conf = SparkConf().setAppName('TestDStream').setMaster('local[2]')
sc = SparkContext(conf=conf)

ssc = StreamingContext(sc, 1)
sql_context = SQLContext(ssc.sparkContext)
lines = ssc.textFileStream('E:\\Python\\pyspark_demo01\\Harry.txt')
words = lines.flatMap(lambda line: line.split(' '))
wordCounts = words.map(lambda word: (word, 1)).reduceByKey(add)
wordCounts.pprint()
ssc.start()
ssc.awaitTermination()
